# Replication Script for 
# "Coordinating Nominations: How to Deal with 
# an Incumbent Surplus after Electoral Reform"
# published in Japanese Journal of Political Science
# author: Jochen Rehmert

# Script replicates:
# Figure: 4

# directory
setwd("...")

# packages 
library(foreign);library(stargazer);library(MASS)
library(sandwich);library(lmtest);library(ggplot2)

# load data
dat <- read.dta("coordination1.dta")

# Hausman test
hausman.iia <- function(dat = dat, strata.var = "smd", choice.var = "name_en", dv.var = "nominated", form  , reps = 100, seed = 1104){   
  require(MASS, quietly = TRUE);require(survival, quietly = TRUE);require(plyr, quietly = TRUE)
  if(is.formula(form)==FALSE){"Please provide the form argument as 'formula' object."}
  out <- c()
  dat.tmp <- dat
  set.seed(seed)
  for(ii in 1:reps){
    smpl.formopp <- sample(dat.tmp[[strata.var]],1  )
    smpl.gov <- dat.tmp[[choice.var]][which(dat.tmp[[dv.var]] == 1 & dat.tmp[[strata.var]] %in% smpl.formopp)]
    smpl.opp <- dat.tmp[[choice.var]][which(dat.tmp[[dv.var]] == 0 & dat.tmp[[strata.var]] %in% smpl.formopp)]
    gov.ptys <- unique(unlist(strsplit(smpl.gov, "; ")))
    opp.ptys <- unique(unlist(strsplit(smpl.opp, "; ")))
    opp.ptys <- opp.ptys[which(!opp.ptys %in% gov.ptys)]
    leave.out.pty <- sample(opp.ptys, 1)
    
    smpl.formopp2 <- gsub("\\..*", "", smpl.formopp)
    
    dat.tmp[["formopp2"]] <- dat.tmp[[strata.var]]
    dat.tmp[["formopp2"]] <- gsub("\\..*", "" ,as.character(dat.tmp[["formopp2"]]))
    
    dat.tmp$tmp.sample <- 1
    dat.tmp[["tmp.sample"]][which(dat.tmp[["formopp2"]] == smpl.formopp2)] <- as.numeric(!as.numeric(lapply(strsplit(dat.tmp[[choice.var]][which(dat.tmp[["formopp2"]] == smpl.formopp2)], "; "), function(x) any(unlist(x) %in% leave.out.pty))))
    
    frm <- form
    mod_f <- clogit(frm, data = dat.tmp)
    mod_r <- clogit(frm, data = dat.tmp[dat.tmp$tmp.sample == 1, ])
    
    b_f <- coef(mod_f)
    b_r <- coef(mod_r)
    vcov_f <- vcov(mod_f)
    vcov_r <- vcov(mod_r)
    
    if(det(vcov_r - vcov_f)==0){next}
    
    hm <- t(b_r-b_f) %*% solve(vcov_r - vcov_f) %*% (b_r-b_f)
    df <- length(b_r)
    p.value <- pchisq(hm, df,  lower.tail = FALSE, log.p = FALSE)
    out <- rbind(out,cbind(p_value = p.value, party = leave.out.pty )  )
    colnames(out) <- c("p_value","party")
    
    cat(ii, leave.out.pty, "\n")}
  out <- as.data.frame(out)
  out[,1] <- as.numeric(as.character(out[,1]))
  bonferroni <- p.adjust(out[,1], method = "bonferroni", n = length(1:reps))
  return(list(out, bonferroni,
              Interpretation = "If smaller than 0.05, significant difference. I.o.w., not independent of alternative"))   
  
}

###########################
# Test for IIA assumption #
###########################
summary(nom.tmp.inc <- clogit(nominated ~  borninku + mainfaction+  exp_voteshare_cand  + strata(smd) , 
                              data = dat[dat$incumbent == 1 & dat$ldp_cand == 1 & dat$candidate_type != "new"  ,]))

summary(nom.tmp <- clogit(nominated ~    borninku + mainfaction+  exp_voteshare_cand + incumbent + strata(smd) , 
                          data = dat[dat$ldp_cand == 1 & dat$candidate_type != "new"  ,]))

sample.smds <- names(which(table(dat[row.names(dat) %in% row.names(model.matrix(nom.tmp)), "smd"]) != 1))
sample.smds.inc <- names(which(table(dat[row.names(dat) %in% row.names(model.matrix(nom.tmp.inc)), "smd"]) != 1))

# Model 1
summary(rr.nom.1 <- clogit(nominated ~   exp_voteshare_cand + age_rel + sen_rel  +  borninku + mainfaction  + strata(smd) , 
                           robust =TRUE, method = "efron" , model = T,
                           data = dat[dat$incumbent == 1 & dat$ldp_cand == 1 & dat$sample == 1  & dat$smd %in% sample.smds.inc  ,]))

### IIA test
form <- as.formula("nominated ~ exp_voteshare_cand + age_rel + sen_rel + borninku + mainfaction + strata(smd)")
hausman.iia(dat = dat, 
            strata.var = "smd",
            choice.var = "name_en",
            dv.var = "nominated", form = form)


###########################################
# Distribution of Predicted Probabilities #
###########################################
mod.1 <- dat[as.numeric(row.names(rr.nom.1$model)), 
             c(names(rr.nom.1$coefficients), "smd", "name_en", "nominated")]

# linear predictions
mod.1$linpreds <- as.matrix(mod.1[,names(rr.nom.1$coefficients)])  %*%  rr.nom.1$coefficients
# sum of linear predictions within districts
sumpreds <- tapply(mod.1$linpreds, mod.1$smd, function(x) sum(exp(x))  )
# conditional predictions
mod.1$preds <- NA
mod.1$preds <- unlist(lapply( names(sumpreds), 
                              function(x) exp(mod.1$linpreds[mod.1$smd == x])/sumpreds[which(names(sumpreds)==x)])  )
mod.1$preds <- round(mod.1$preds, 5)

# effective number of districts over which pred Pr distribute
out <- c()
for(nn in unique(mod.1$name_en)){
  tmp <- mod.1[mod.1$name_en == nn, c("nominated","smd","preds")]
  sum.tmp <- sum(tmp$preds)
  tmp$preds <- tmp$preds/sum.tmp # normalize predictions across districs
  enp <- 1/sum(tmp$preds^2)
  out <- rbind(out, cbind(nn,tmp, enp))
}
out$nn <- as.character(out$nn)
out2 <- unique(out[ , c("nn","enp")])

###########################################################################
# Figure 4: Probablities and Effective Number of Districts for Candidates #
###########################################################################
# histogram
hist(out2$enp, breaks = 35, main = "",
     xlab = "")
